package megaloscope

import (
	"bufio"
	"os"
	"strings"

	"github.com/Chain-Zhang/pinyin"
)

/**
词组
*/
type (
	//词组
	WordSlice []string
	//规则
	Rule struct {
		Raw            string    //规则原始定义
		Words          WordSlice //中文词组合
		ExcludeWords   WordSlice //排除词
		WordsPY        WordSlice //词拼音
		ExcludeWordsPY WordSlice //排除词拼音
	}
	//敏感词检测
	Megaloscope struct {
		AllWords       WordSlice     //所有词
		AllWordsPY     WordSlice     //所有词的拼音
		AllRules       map[int]*Rule //所有词组
		WordsMatcher   *Matcher
		WordsPYMatcher *Matcher
	}
)

//
func NewMegaloscope(filepath string) *Megaloscope {
	m := &Megaloscope{AllRules: make(map[int]*Rule)}
	m.loadRules(filepath)
	return m
}

//规则解析
func (m *Megaloscope) parseRule(line string) *Rule {
	r := &Rule{Raw: line}
	if strings.Contains(line, "^") {
		tmp := strings.Split(line, "^")
		r.Words = strings.Split(tmp[0], "+")
		r.ExcludeWords = strings.Split(tmp[1], "|")
	} else {
		r.Words = strings.Split(line, "+")
	}
	return r
}

//加载规则
func (m *Megaloscope) loadRules(filepath string) error {
	fi, err := os.Open(filepath)
	if err != nil {
		return err
	}
	defer fi.Close()
	reader := bufio.NewReader(fi)
	allWords := make(WordSlice, 0)
	for i := 0; ; i++ {
		bs, _, err := reader.ReadLine()
		if err != nil {
			break
		}
		rule := m.parseRule(string(bs))
		m.AllRules[i] = rule
		allWords = append(allWords, rule.Words...)
		allWords = append(allWords, rule.ExcludeWords...)
	}
	//所有词去重
	tmp := map[string]bool{}
	for _, w := range allWords {
		tmp[w] = true
	}
	//计算词的拼音
	allWordPY := make(map[string]string)
	words := make([]string, len(tmp))
	wordsPY := make([]string, len(tmp))
	index := 0
	for w := range tmp {
		py, _ := pinyin.New(w).Split("").Mode(pinyin.InitialsInCapitals).Convert()
		allWordPY[w] = py
		words[index] = w
		wordsPY[index] = py
		index += 1
	}
	m.AllWords = words
	m.AllWordsPY = wordsPY
	//完善规则中的拼音
	for _, v := range m.AllRules {
		v.WordsPY = make(WordSlice, len(v.Words))
		for i, w := range v.Words {
			v.WordsPY[i] = allWordPY[w]
		}
		if len(v.ExcludeWords) > 0 {
			v.ExcludeWordsPY = make(WordSlice, len(v.ExcludeWords))
			for i, w := range v.ExcludeWords {
				v.ExcludeWordsPY[i] = allWordPY[w]
			}
		}
	}
	m.WordsPYMatcher = BuildNewMatcher(m.AllWordsPY)
	m.WordsMatcher = BuildNewMatcher(m.AllWords)
	return nil
}
